# ---------------------------------------------------------------------------------------------------- #
# sample2.
# ---------------------------------------------------------------------------------------------------- #
#
# Date: 2014-06-18
# Authors: Jaime E. Settle, Robert M. Bond, Lorenzo Coviello, Christopher J. Fariss, James H. Fowler, Jason J. Jones, Adam D. I. Kramer, Cameron Marlow
#
# All inquires about the models and code should be sent to Jaime Settle and Chris Fariss
# Contact: jsettle@wm.edu cjf0006@gmail.com
#
# Title:From Posting to Voting: The Effects of Political Competition on Online Political Engagement
# Most recent version available at: http://ssrn.com/abstract=2232099
#
# Journal: Political Science Research & Methods
#
# Copyright (c) 2014, The R code is under the Creative Commons Attribution-Noncommercial-Share Alike 3.0 United States License.
# For more information see: http://creativecommons.org/licenses/by-nc-sa/3.0/us/
# All rights reserved.
#
# The data are available by request though Facebook Inc. Interested parties need to contact Facebook Inc. directly to obtain secure access to the data.
#
# R code begins below
# ---------------------------------------------------------------------------------------------------- #
#
#
#
#
############################################################
#-------------Adding in Other Variables-------------------#
###########################################################

#This file came off of the Facebook server. Each row is one status update from the sample
load(file="/home/jfowler/Discussion/JMData/StateFileStatuses/data.rda")

#This loads in the matched sample file, sampledecile
d <- Sys.time()
load(file="/home/jfowler/Discussion/JMData/sampledecile.rda")

# calculate crosstabs
userposts <- as.data.frame(xtabs(~ userid, data=data))
userpol75 <- as.data.frame(xtabs(political75 ~ userid, data=data))
userpol90 <- as.data.frame(xtabs(political90 ~ userid, data=data))
useremopol75 <- as.data.frame(xtabs(emopolitical75 ~ userid, data=data))

# subset data based on high probability that political discussion took place
primary <- data[which(data$days<160),]
primary75 <- subset(primary, polprob>=.75)
primary90 <- subset(primary, polprob>=.90)
emoprimary75 <- subset(primary, emopolitical75==1)

# calculate crosstabs
userpostsprimary <- as.data.frame(xtabs(~ userid, data=primary))
userpol75primary <- as.data.frame(xtabs(political75 ~ userid, data=primary75))
userpol90primary <- as.data.frame(xtabs(political90 ~ userid, data=primary90))
useremopol75primary <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emoprimary75))


# subset on specific period of the year
summer <- data[which((data$days>=160 & data$days<238),]
summer75 <- subset(summer, polprob>=.75)
summer90 <- subset(summer, polprob>=.90)
emosummer75 <- subset(summer, emopolitical75==1)

# calculate crosstabs
userpostssummer <- as.data.frame(xtabs(~ userid, data=summer))
userpol75summer <- as.data.frame(xtabs(political75 ~ userid, data=summer75))
userpol90summer <- as.data.frame(xtabs(political90 ~ userid, data=summer90))
useremopol75summer <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emosummer75))


# subset on specific period of the year
september <- data[which(data$days>=238 & data$days<=274),]
september75 <- subset(september, polprob>=.75)
september90 <- subset(september, polprob>=.90)
emoseptember75 <- subset(september, emopolitical75==1)

# calculate crosstabs
userpostsseptember <- as.data.frame(xtabs(~ userid, data=september))
userpol75september <- as.data.frame(xtabs(political75 ~ userid, data=september75))
userpol90september <- as.data.frame(xtabs(political90 ~ userid, data=september90))
useremopol75september <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emoseptember75))


# subset on specific period of the year
october <- data[which(data$days>=275 & data$days<=312),]
october75 <- subset(october, polprob>=.75)
october90 <- subset(october, polprob>=.90)
emooctober75 <- subset(october, emopolitical75==1)

# calculate crosstabs
userpostsoctober <- as.data.frame(xtabs(~ userid, data=october))
userpol75october <- as.data.frame(xtabs(political75 ~ userid, data=october75))
userpol90october <- as.data.frame(xtabs(political90 ~ userid, data=october90))
useremopol75october <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emooctober75))


# subset on specific period of the year: election day!
electionday <- data[which(data$days==309),]
electionday75 <- subset(electionday, polprob>=.75)
electionday90 <- subset(electionday, polprob>=.90)
emoelectionday75 <- subset(electionday, emopolitical75==1)

# calculate crosstabs
userpostselecday <- as.data.frame(xtabs(~ userid, data=electionday))
userpol75elecday <- as.data.frame(xtabs(political75 ~ userid, data=electionday75))
userpol90elecday <- as.data.frame(xtabs(political90 ~ userid, data=electionday90))
useremopol75elecday <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emoelectionday75))



# subset on specific period of the year
postseason <- data[which(data$days>312),]
postseason75 <- subset(postseason, polprob>=.75)
postseason90 <- subset(postseason, polprob>=.90)
emopostseason75 <- subset(postseason, emopolitical75==1)

# calculate crosstabs
userpostspostseason <- as.data.frame(xtabs(~ userid, data=postseason))
userpol75postseason <- as.data.frame(xtabs(political75 ~ userid, data=postseason75))
userpol90postseason <- as.data.frame(xtabs(political90 ~ userid, data=postseason90))
useremopol75postseason <- as.data.frame(xtabs(emopolitical75 ~ userid, data=emopostseason75))



# calculate the amount of time the file took to execute
Sys.time()-d


# remove data object from memory
remove(data)

# rename object
sample <- sampledecile

# remove sampledecile object
remove(sampledecile)

# ----------------------------Variable Creation---------------#
# Here we create frequency variables for the number of times a user creates a post with political content

#-----------------------------Total Time Period---------------#
sample$totalposts <- NA
sample$totalposts <- userposts$Freq[match(sample$userid, userposts$userid)]
sample$totalposts[which(is.na(sample$totalposts))] <- 0

sample$pol75posts <- NA
sample$pol75posts <- userpol75$Freq[match(sample$userid, userpol75$userid)]
sample$pol75posts[which(is.na(sample$pol75posts))] <- 0

sample$pol90posts <- NA
sample$pol90posts <- userpol90$Freq[match(sample$userid, userpol90$userid)]
sample$pol90posts[which(is.na(sample$pol90posts))] <- 0

sample$pol75prop <- sample$pol75posts/sample$totalposts
sample$pol90prop <- sample$pol90posts/sample$totalposts

sample$emopol75posts <- NA
sample$emopol75posts <- useremopol75$Freq[match(sample$userid, useremopol75$userid)]
sample$emopol75posts[which(is.na(sample$emopol75posts))] <- 0
sample$emopol75prop <- sample$emopol75posts/sample$pol75posts

#------------------------Primary Season--------------------#

sample$primarytotal <- NA
sample$primarytotal <- userpostsprimary$Freq[match(sample$userid, userpostsprimary$userid)]
sample$primarytotal[which(is.na(sample$primarytotal))] <- 0
                   
sample$primarypol75posts <- NA
sample$primarypol75posts <- userpol75primary$Freq[match(sample$userid, userpol75primary$userid)]
sample$primarypol75posts[which(is.na(sample$primarypol75posts))] <- 0

sample$primarypol90posts <- NA
sample$primarypol90posts <- userpol90primary$Freq[match(sample$userid, userpol90primary$userid)]
sample$primarypol90posts[which(is.na(sample$primarypol90posts))] <- 0

sample$primarypol75prop <- sample$primarypol75posts/sample$primarytotal
sample$primarypol90prop <- sample$primarypol90posts/sample$primarytotal

sample$primaryemopol75posts <- NA
sample$primaryemopol75posts <- useremopol75primary$Freq[match(sample$userid, useremopol75primary$userid)]
sample$primaryemopol75posts[which(is.na(sample$primaryemopol75posts))] <- 0
sample$primaryemopol75prop <- sample$primaryemopol75posts/sample$primarypol75posts



#------------------------Summer--------------------#

sample$summertotal <- NA
sample$summertotal <- userpostssummer$Freq[match(sample$userid, userpostssummer$userid)]
sample$summertotal[which(is.na(sample$summertotal))] <- 0
                   
sample$summerpol75posts <- NA
sample$summerpol75posts <- userpol75summer$Freq[match(sample$userid, userpol75summer$userid)]
sample$summerpol75posts[which(is.na(sample$summerpol75posts))] <- 0

sample$summerpol90posts <- NA
sample$summerpol90posts <- userpol90summer$Freq[match(sample$userid, userpol90summer$userid)]
sample$summerpol90posts[which(is.na(sample$summerpol90posts))] <- 0

sample$summerpol75prop <- sample$summerpol75posts/sample$summertotal
sample$summerpol90prop <- sample$summerpol90posts/sample$summertotal

sample$summeremopol75posts <- NA
sample$summeremopol75posts <- useremopol75summer$Freq[match(sample$userid, useremopol75summer$userid)]
sample$summeremopol75posts[which(is.na(sample$summeremopol75posts))] <- 0
sample$summeremopol75prop <- sample$summeremopol75posts/sample$summerpol75posts


#----------------------------September -----------------------#
sample$septembertotal <- NA
sample$septembertotal <- userpostsseptember$Freq[match(sample$userid, userpostsseptember$userid)]
sample$septembertotal[which(is.na(sample$septembertotal))] <- 0

sample$septemberpol75posts <- NA
sample$septemberpol75posts <- userpol75september$Freq[match(sample$userid, userpol75september$userid)]
sample$septemberpol75posts[which(is.na(sample$septemberpol75posts))] <- 0

sample$septemberpol90posts <- NA
sample$septemberpol90posts <- userpol90september$Freq[match(sample$userid, userpol90september$userid)]
sample$septemberpol90posts[which(is.na(sample$septemberpol90posts))] <- 0

sample$septemberpol75prop <- sample$septemberpol75posts/sample$septembertotal
sample$septemberpol90prop <- sample$septemberpol90posts/sample$septembertotal

sample$septemberemopol75posts <- NA
sample$septemberemopol75posts <- useremopol75september$Freq[match(sample$userid, useremopol75september$userid)]
sample$septemberemopol75posts[which(is.na(sample$septemberemopol75posts))] <- 0
sample$septemberemopol75prop <- sample$septemberemopol75posts/sample$septemberpol75posts

#----------------------------October---------------------------------#

sample$octobertotal <- NA
sample$octobertotal <- userpostsoctober$Freq[match(sample$userid, userpostsoctober$userid)]
sample$octobertotal[which(is.na(sample$octobertotal))] <- 0

sample$octoberpol75posts <- NA
sample$octoberpol75posts <- userpol75october$Freq[match(sample$userid, userpol75october$userid)]
sample$octoberpol75posts[which(is.na(sample$octoberpol75posts))] <- 0

sample$octoberpol90posts <- NA
sample$octoberpol90posts <- userpol90october$Freq[match(sample$userid, userpol90october$userid)]
sample$octoberpol90posts[which(is.na(sample$octoberpol90posts))] <- 0

sample$octoberpol75prop <- sample$octoberpol75posts/sample$octobertotal
sample$octoberpol90prop <- sample$octoberpol90posts/sample$octobertotal

sample$octoberemopol75posts <- NA
sample$octoberemopol75posts <- useremopol75october$Freq[match(sample$userid, useremopol75october$userid)]
sample$octoberemopol75posts[which(is.na(sample$octoberemopol75posts))] <- 0
sample$octoberemopol75prop <- sample$octoberemopol75posts/sample$octoberpol75posts

#------------------------------Election Day--------------------------#

sample$elecdayposts <- NA
sample$elecdayposts <- userpostselecday$Freq[match(sample$userid, userpostselecday$userid)]
sample$elecdayposts[which(is.na(sample$elecdayposts))] <- 0

sample$pol75postselecday <- NA
sample$pol75postselecday <- userpol75elecday$Freq[match(sample$userid, userpol75elecday$userid)]
sample$pol75postselecday[which(is.na(sample$pol75postselecday))] <- 0

sample$pol90postselecday <- NA
sample$pol90postselecday <- userpol90elecday$Freq[match(sample$userid, userpol90elecday$userid)]
sample$pol90postselecday[which(is.na(sample$pol90postselecday))] <- 0

sample$pol75propelecday <- sample$pol75postselecday/sample$elecdayposts
sample$pol90propelecday <- sample$pol90postselecday/sample$elecdayposts


sample$emopol75postselecday <- NA
sample$emopol75postselecday <- useremopol75elecday$Freq[match(sample$userid, useremopol75elecday$userid)]
sample$emopol75postselecday[which(is.na(sample$emopol75postselecday))] <- 0
sample$emopol75propelecday <- sample$emopol75postselecday/sample$pol75postselecday

Sys.time()-d



##############################################
#--------Other Misc.-------------------------#
##############################################

#These variables were not used in any analysis, but were created originally and should be included so that the column numbers match up later on for any hard coded analyses.
               
sample$opinleader <- NA
sample$opinleader[which(sample$primarytotal>0)] <- 0
sample$opinleader[which(sample$primarypol75prop > quantile(sample$primarypol75prop, prob=.98, na.rm=T))] <- 1

sample$opinleader2 <- NA
sample$opinleader2[which(sample$primarytotal>=10)] <- 0
sample$opinleader2[which((sample$primarypol75prop > quantile(sample$primarypol75prop, prob=.98, na.rm=T)) & (sample$primarytotal>=10))] <- 1


sample$opinleader3 <- NA
sample$opinleader3[which(sample$primarytotal>0)] <- 0
sample$opinleader3[which(sample$primarypol75prop > quantile(sample$primarypol75prop, prob=.90, na.rm=T))] <- 1

sample$opinleader4 <- NA
sample$opinleader4[which(sample$primarytotal>=10)] <- 0
sample$opinleader4[which((sample$primarypol75prop > quantile(sample$primarypol75prop, prob=.90, na.rm=T)) & (sample$primarytotal>=10))] <- 1

sample$countdum <- 1



##############################################
#---------------Saving----------------------#
###############################################

sample2 <- sample

# save the data
save(sample2, file="/home/jfowler/Discussion/JMData/sample2.rda")


#--------------------End of File-----------------------#

